import pandas as pd
cities = pd.read_csv('city.csv', sep=';')
cities
ID | Name | CountryCode | District | Population | |
---|---|---|---|---|---|
0 | 1 | Kabul | AFG | Kabol | 1780000 |
1 | 2 | Qandahar | AFG | Qandahar | 237500 |
2 | 3 | Herat | AFG | Herat | 186800 |
3 | 4 | Mazar-e-Sharif | AFG | Balkh | 127800 |
4 | 5 | Amsterdam | NLD | Noord-Holland | 731200 |
... | ... | ... | ... | ... | ... |
4074 | 4075 | Khan Yunis | PSE | Khan Yunis | 123175 |
4075 | 4076 | Hebron | PSE | Hebron | 119401 |
4076 | 4077 | Jabaliya | PSE | North Gaza | 113901 |
4077 | 4078 | Nablus | PSE | Nablus | 100231 |
4078 | 4079 | Rafah | PSE | Rafah | 92020 |
4079 rows × 5 columns
cities.pivot_table(index='CountryCode', values='Population', aggfunc='sum')
Population | |
---|---|
CountryCode | |
ABW | 29034 |
AFG | 2332100 |
AGO | 2561600 |
AIA | 1556 |
ALB | 270000 |
... | ... |
YEM | 1743700 |
YUG | 2189507 |
ZAF | 15196370 |
ZMB | 2473500 |
ZWE | 2730420 |
232 rows × 1 columns
df = pd.read_csv('ratings.csv')
df
userId | movieId | rating | timestamp | |
---|---|---|---|---|
0 | 1 | 1 | 4.0 | 964982703 |
1 | 1 | 3 | 4.0 | 964981247 |
2 | 1 | 6 | 4.0 | 964982224 |
3 | 1 | 47 | 5.0 | 964983815 |
4 | 1 | 50 | 5.0 | 964982931 |
... | ... | ... | ... | ... |
100831 | 610 | 166534 | 4.0 | 1493848402 |
100832 | 610 | 168248 | 5.0 | 1493850091 |
100833 | 610 | 168250 | 5.0 | 1494273047 |
100834 | 610 | 168252 | 5.0 | 1493846352 |
100835 | 610 | 170875 | 3.0 | 1493846415 |
100836 rows × 4 columns
df[ df.userId == 1 ].rating.value_counts()
5.0 124 4.0 76 3.0 26 2.0 5 1.0 1 Name: rating, dtype: int64
df.groupby('userId')['rating'].value_counts().head(20)
userId rating 1 5.0 124 4.0 76 3.0 26 2.0 5 1.0 1 2 4.0 9 5.0 6 3.0 4 3.5 4 4.5 4 2.0 1 2.5 1 3 0.5 20 5.0 10 4.5 5 2.0 1 3.0 1 3.5 1 4.0 1 4 4.0 64 Name: rating, dtype: int64
df.pivot_table(values='movieId', index='userId', columns='rating', aggfunc='count')
rating | 0.5 | 1.0 | 1.5 | 2.0 | 2.5 | 3.0 | 3.5 | 4.0 | 4.5 | 5.0 |
---|---|---|---|---|---|---|---|---|---|---|
userId | ||||||||||
1 | NaN | 1.0 | NaN | 5.0 | NaN | 26.0 | NaN | 76.0 | NaN | 124.0 |
2 | NaN | NaN | NaN | 1.0 | 1.0 | 4.0 | 4.0 | 9.0 | 4.0 | 6.0 |
3 | 20.0 | NaN | NaN | 1.0 | NaN | 1.0 | 1.0 | 1.0 | 5.0 | 10.0 |
4 | NaN | 23.0 | NaN | 26.0 | NaN | 39.0 | NaN | 64.0 | NaN | 64.0 |
5 | NaN | 1.0 | NaN | 3.0 | NaN | 17.0 | NaN | 13.0 | NaN | 10.0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
606 | 4.0 | 6.0 | 13.0 | 22.0 | 79.0 | 137.0 | 241.0 | 441.0 | 139.0 | 33.0 |
607 | NaN | 3.0 | NaN | 10.0 | NaN | 63.0 | NaN | 59.0 | NaN | 52.0 |
608 | 33.0 | 29.0 | 30.0 | 69.0 | 95.0 | 169.0 | 126.0 | 162.0 | 95.0 | 23.0 |
609 | NaN | NaN | NaN | NaN | NaN | 27.0 | NaN | 10.0 | NaN | NaN |
610 | 3.0 | 13.0 | 11.0 | 42.0 | 74.0 | 230.0 | 315.0 | 286.0 | 148.0 | 180.0 |
610 rows × 10 columns
df.pivot_table(values='movieId', index='userId', columns='rating', aggfunc='count', fill_value=0)
rating | 0.5 | 1.0 | 1.5 | 2.0 | 2.5 | 3.0 | 3.5 | 4.0 | 4.5 | 5.0 |
---|---|---|---|---|---|---|---|---|---|---|
userId | ||||||||||
1 | 0 | 1 | 0 | 5 | 0 | 26 | 0 | 76 | 0 | 124 |
2 | 0 | 0 | 0 | 1 | 1 | 4 | 4 | 9 | 4 | 6 |
3 | 20 | 0 | 0 | 1 | 0 | 1 | 1 | 1 | 5 | 10 |
4 | 0 | 23 | 0 | 26 | 0 | 39 | 0 | 64 | 0 | 64 |
5 | 0 | 1 | 0 | 3 | 0 | 17 | 0 | 13 | 0 | 10 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
606 | 4 | 6 | 13 | 22 | 79 | 137 | 241 | 441 | 139 | 33 |
607 | 0 | 3 | 0 | 10 | 0 | 63 | 0 | 59 | 0 | 52 |
608 | 33 | 29 | 30 | 69 | 95 | 169 | 126 | 162 | 95 | 23 |
609 | 0 | 0 | 0 | 0 | 0 | 27 | 0 | 10 | 0 | 0 |
610 | 3 | 13 | 11 | 42 | 74 | 230 | 315 | 286 | 148 | 180 |
610 rows × 10 columns
df.pivot_table(values='movieId', index='userId', columns='rating', aggfunc='count', fill_value=0, margins=True)
rating | 0.5 | 1.0 | 1.5 | 2.0 | 2.5 | 3.0 | 3.5 | 4.0 | 4.5 | 5.0 | All |
---|---|---|---|---|---|---|---|---|---|---|---|
userId | |||||||||||
1 | 0 | 1 | 0 | 5 | 0 | 26 | 0 | 76 | 0 | 124 | 232 |
2 | 0 | 0 | 0 | 1 | 1 | 4 | 4 | 9 | 4 | 6 | 29 |
3 | 20 | 0 | 0 | 1 | 0 | 1 | 1 | 1 | 5 | 10 | 39 |
4 | 0 | 23 | 0 | 26 | 0 | 39 | 0 | 64 | 0 | 64 | 216 |
5 | 0 | 1 | 0 | 3 | 0 | 17 | 0 | 13 | 0 | 10 | 44 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
607 | 0 | 3 | 0 | 10 | 0 | 63 | 0 | 59 | 0 | 52 | 187 |
608 | 33 | 29 | 30 | 69 | 95 | 169 | 126 | 162 | 95 | 23 | 831 |
609 | 0 | 0 | 0 | 0 | 0 | 27 | 0 | 10 | 0 | 0 | 37 |
610 | 3 | 13 | 11 | 42 | 74 | 230 | 315 | 286 | 148 | 180 | 1302 |
All | 1370 | 2811 | 1791 | 7551 | 5550 | 20047 | 13136 | 26818 | 8551 | 13211 | 100836 |
611 rows × 11 columns
data = df.pivot_table(values='movieId', index='userId', columns='rating', aggfunc='count', fill_value=0, margins=True)
data.sort_values(by=5, ascending=False)
rating | 0.5 | 1.0 | 1.5 | 2.0 | 2.5 | 3.0 | 3.5 | 4.0 | 4.5 | 5.0 | All |
---|---|---|---|---|---|---|---|---|---|---|---|
userId | |||||||||||
All | 1370 | 2811 | 1791 | 7551 | 5550 | 20047 | 13136 | 26818 | 8551 | 13211 | 100836 |
380 | 2 | 16 | 0 | 94 | 13 | 399 | 25 | 391 | 4 | 274 | 1218 |
414 | 1 | 40 | 20 | 398 | 122 | 658 | 232 | 903 | 76 | 248 | 2698 |
275 | 0 | 10 | 0 | 31 | 0 | 62 | 0 | 111 | 0 | 189 | 403 |
305 | 4 | 3 | 3 | 14 | 43 | 103 | 89 | 161 | 75 | 182 | 677 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
132 | 1 | 2 | 12 | 55 | 30 | 114 | 64 | 55 | 14 | 0 | 347 |
311 | 5 | 3 | 2 | 1 | 3 | 8 | 2 | 3 | 1 | 0 | 28 |
478 | 1 | 0 | 2 | 5 | 3 | 2 | 4 | 3 | 2 | 0 | 22 |
293 | 0 | 0 | 0 | 11 | 0 | 7 | 0 | 3 | 0 | 0 | 21 |
133 | 0 | 1 | 0 | 5 | 0 | 22 | 0 | 7 | 0 | 0 | 35 |
611 rows × 11 columns
data.sort_values(by=0.5, ascending=False)
rating | 0.5 | 1.0 | 1.5 | 2.0 | 2.5 | 3.0 | 3.5 | 4.0 | 4.5 | 5.0 | All |
---|---|---|---|---|---|---|---|---|---|---|---|
userId | |||||||||||
All | 1370 | 2811 | 1791 | 7551 | 5550 | 20047 | 13136 | 26818 | 8551 | 13211 | 100836 |
298 | 144 | 41 | 57 | 123 | 163 | 239 | 87 | 76 | 6 | 3 | 939 |
567 | 56 | 38 | 54 | 48 | 53 | 54 | 41 | 16 | 11 | 14 | 385 |
307 | 56 | 57 | 85 | 158 | 136 | 184 | 112 | 126 | 45 | 16 | 975 |
517 | 54 | 59 | 33 | 45 | 42 | 60 | 28 | 41 | 16 | 22 | 400 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
235 | 0 | 1 | 0 | 5 | 0 | 21 | 0 | 27 | 0 | 11 | 65 |
234 | 0 | 7 | 0 | 29 | 0 | 61 | 0 | 65 | 0 | 40 | 202 |
233 | 0 | 2 | 3 | 9 | 14 | 38 | 42 | 33 | 5 | 4 | 150 |
231 | 0 | 0 | 0 | 3 | 0 | 3 | 1 | 11 | 0 | 6 | 24 |
306 | 0 | 0 | 0 | 11 | 9 | 36 | 22 | 27 | 2 | 5 | 112 |
611 rows × 11 columns
sales = pd.read_excel('sales-funnel.xlsx')
sales
Account | Name | Rep | Manager | Product | Quantity | Price | Status | |
---|---|---|---|---|---|---|---|---|
0 | 714466 | Trantow-Barrows | Craig Booker | Debra Henley | CPU | 1 | 30000 | presented |
1 | 714466 | Trantow-Barrows | Craig Booker | Debra Henley | Software | 1 | 10000 | presented |
2 | 714466 | Trantow-Barrows | Craig Booker | Debra Henley | Maintenance | 2 | 5000 | pending |
3 | 737550 | Fritsch, Russel and Anderson | Craig Booker | Debra Henley | CPU | 1 | 35000 | declined |
4 | 146832 | Kiehn-Spinka | Daniel Hilton | Debra Henley | CPU | 2 | 65000 | won |
5 | 218895 | Kulas Inc | Daniel Hilton | Debra Henley | CPU | 2 | 40000 | pending |
6 | 218895 | Kulas Inc | Daniel Hilton | Debra Henley | Software | 1 | 10000 | presented |
7 | 412290 | Jerde-Hilpert | John Smith | Debra Henley | Maintenance | 2 | 5000 | pending |
8 | 740150 | Barton LLC | John Smith | Debra Henley | CPU | 1 | 35000 | declined |
9 | 141962 | Herman LLC | Cedric Moss | Fred Anderson | CPU | 2 | 65000 | won |
10 | 163416 | Purdy-Kunde | Cedric Moss | Fred Anderson | CPU | 1 | 30000 | presented |
11 | 239344 | Stokes LLC | Cedric Moss | Fred Anderson | Maintenance | 1 | 5000 | pending |
12 | 239344 | Stokes LLC | Cedric Moss | Fred Anderson | Software | 1 | 10000 | presented |
13 | 307599 | Kassulke, Ondricka and Metz | Wendy Yule | Fred Anderson | Maintenance | 3 | 7000 | won |
14 | 688981 | Keeling LLC | Wendy Yule | Fred Anderson | CPU | 5 | 100000 | won |
15 | 729833 | Koepp Ltd | Wendy Yule | Fred Anderson | CPU | 2 | 65000 | declined |
16 | 729833 | Koepp Ltd | Wendy Yule | Fred Anderson | Monitor | 2 | 5000 | presented |
sales.pivot_table(index='Name')
Account | Price | Quantity | |
---|---|---|---|
Name | |||
Barton LLC | 740150 | 35000 | 1.000000 |
Fritsch, Russel and Anderson | 737550 | 35000 | 1.000000 |
Herman LLC | 141962 | 65000 | 2.000000 |
Jerde-Hilpert | 412290 | 5000 | 2.000000 |
Kassulke, Ondricka and Metz | 307599 | 7000 | 3.000000 |
Keeling LLC | 688981 | 100000 | 5.000000 |
Kiehn-Spinka | 146832 | 65000 | 2.000000 |
Koepp Ltd | 729833 | 35000 | 2.000000 |
Kulas Inc | 218895 | 25000 | 1.500000 |
Purdy-Kunde | 163416 | 30000 | 1.000000 |
Stokes LLC | 239344 | 7500 | 1.000000 |
Trantow-Barrows | 714466 | 15000 | 1.333333 |
sales.pivot_table(index=['Manager', 'Name'])
Account | Price | Quantity | ||
---|---|---|---|---|
Manager | Name | |||
Debra Henley | Barton LLC | 740150 | 35000 | 1.000000 |
Fritsch, Russel and Anderson | 737550 | 35000 | 1.000000 | |
Jerde-Hilpert | 412290 | 5000 | 2.000000 | |
Kiehn-Spinka | 146832 | 65000 | 2.000000 | |
Kulas Inc | 218895 | 25000 | 1.500000 | |
Trantow-Barrows | 714466 | 15000 | 1.333333 | |
Fred Anderson | Herman LLC | 141962 | 65000 | 2.000000 |
Kassulke, Ondricka and Metz | 307599 | 7000 | 3.000000 | |
Keeling LLC | 688981 | 100000 | 5.000000 | |
Koepp Ltd | 729833 | 35000 | 2.000000 | |
Purdy-Kunde | 163416 | 30000 | 1.000000 | |
Stokes LLC | 239344 | 7500 | 1.000000 |
sales.pivot_table(index=['Manager'], aggfunc='sum')
Account | Price | Quantity | |
---|---|---|---|
Manager | |||
Debra Henley | 4618010 | 235000 | 13 |
Fred Anderson | 3240312 | 287000 | 17 |
sales.groupby('Manager').sum()
Account | Quantity | Price | |
---|---|---|---|
Manager | |||
Debra Henley | 4618010 | 13 | 235000 |
Fred Anderson | 3240312 | 17 | 287000 |
sales.pivot_table(index='Manager', columns='Name', values='Price', aggfunc='sum', fill_value=0, margins=True)
Name | Barton LLC | Fritsch, Russel and Anderson | Herman LLC | Jerde-Hilpert | Kassulke, Ondricka and Metz | Keeling LLC | Kiehn-Spinka | Koepp Ltd | Kulas Inc | Purdy-Kunde | Stokes LLC | Trantow-Barrows | All |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Manager | |||||||||||||
Debra Henley | 35000 | 35000 | 0 | 5000 | 0 | 0 | 65000 | 0 | 50000 | 0 | 0 | 45000 | 235000 |
Fred Anderson | 0 | 0 | 65000 | 0 | 7000 | 100000 | 0 | 70000 | 0 | 30000 | 15000 | 0 | 287000 |
All | 35000 | 35000 | 65000 | 5000 | 7000 | 100000 | 65000 | 70000 | 50000 | 30000 | 15000 | 45000 | 522000 |
sales.pivot_table(index='Name', columns='Manager', values='Price', aggfunc='sum', fill_value=0, margins=True)
Manager | Debra Henley | Fred Anderson | All |
---|---|---|---|
Name | |||
Barton LLC | 35000 | 0 | 35000 |
Fritsch, Russel and Anderson | 35000 | 0 | 35000 |
Herman LLC | 0 | 65000 | 65000 |
Jerde-Hilpert | 5000 | 0 | 5000 |
Kassulke, Ondricka and Metz | 0 | 7000 | 7000 |
Keeling LLC | 0 | 100000 | 100000 |
Kiehn-Spinka | 65000 | 0 | 65000 |
Koepp Ltd | 0 | 70000 | 70000 |
Kulas Inc | 50000 | 0 | 50000 |
Purdy-Kunde | 0 | 30000 | 30000 |
Stokes LLC | 0 | 15000 | 15000 |
Trantow-Barrows | 45000 | 0 | 45000 |
All | 235000 | 287000 | 522000 |
sales.pivot_table(index='Name', columns=['Manager', 'Quantity'], values='Price', aggfunc='sum', fill_value=0, margins=True)
Manager | Debra Henley | Fred Anderson | All | ||||
---|---|---|---|---|---|---|---|
Quantity | 1 | 2 | 1 | 2 | 3 | 5 | |
Name | |||||||
Barton LLC | 35000 | 0 | 0 | 0 | 0 | 0 | 35000 |
Fritsch, Russel and Anderson | 35000 | 0 | 0 | 0 | 0 | 0 | 35000 |
Herman LLC | 0 | 0 | 0 | 65000 | 0 | 0 | 65000 |
Jerde-Hilpert | 0 | 5000 | 0 | 0 | 0 | 0 | 5000 |
Kassulke, Ondricka and Metz | 0 | 0 | 0 | 0 | 7000 | 0 | 7000 |
Keeling LLC | 0 | 0 | 0 | 0 | 0 | 100000 | 100000 |
Kiehn-Spinka | 0 | 65000 | 0 | 0 | 0 | 0 | 65000 |
Koepp Ltd | 0 | 0 | 0 | 70000 | 0 | 0 | 70000 |
Kulas Inc | 10000 | 40000 | 0 | 0 | 0 | 0 | 50000 |
Purdy-Kunde | 0 | 0 | 30000 | 0 | 0 | 0 | 30000 |
Stokes LLC | 0 | 0 | 15000 | 0 | 0 | 0 | 15000 |
Trantow-Barrows | 40000 | 5000 | 0 | 0 | 0 | 0 | 45000 |
All | 120000 | 115000 | 45000 | 135000 | 7000 | 100000 | 522000 |
sales.pivot_table(index='Name', columns='Manager', values='Price', aggfunc=['sum', 'count'], fill_value=0, margins=True)
sum | count | |||||
---|---|---|---|---|---|---|
Manager | Debra Henley | Fred Anderson | All | Debra Henley | Fred Anderson | All |
Name | ||||||
Barton LLC | 35000 | 0 | 35000 | 1 | 0 | 1 |
Fritsch, Russel and Anderson | 35000 | 0 | 35000 | 1 | 0 | 1 |
Herman LLC | 0 | 65000 | 65000 | 0 | 1 | 1 |
Jerde-Hilpert | 5000 | 0 | 5000 | 1 | 0 | 1 |
Kassulke, Ondricka and Metz | 0 | 7000 | 7000 | 0 | 1 | 1 |
Keeling LLC | 0 | 100000 | 100000 | 0 | 1 | 1 |
Kiehn-Spinka | 65000 | 0 | 65000 | 1 | 0 | 1 |
Koepp Ltd | 0 | 70000 | 70000 | 0 | 2 | 2 |
Kulas Inc | 50000 | 0 | 50000 | 2 | 0 | 2 |
Purdy-Kunde | 0 | 30000 | 30000 | 0 | 1 | 1 |
Stokes LLC | 0 | 15000 | 15000 | 0 | 2 | 2 |
Trantow-Barrows | 45000 | 0 | 45000 | 3 | 0 | 3 |
All | 235000 | 287000 | 522000 | 9 | 8 | 17 |